*This dofile creates an indicator of whether a name should be considered a "miscode" for men or for women

clear
*set mem 600m
set more off
set logtype text
set matsize 800
cap program drop _all

cap log close

clear
set mem 700m
set more off
set logtype text
set matsize 800


use ${rawdir}namecount_male.dta, clear
rename count count_male
keep first count_male

mer 1:1 first using ${rawdir}namecount_female.dta
rename count count_female
drop _merge

foreach var of varlist count* {
	replace `var'=0 if `var'==.
}

gen pct_fem=count_female/(count_female+count_male)
gen pct_male=count_male/(count_female+count_male)
gen male_drop=(pct_fem>=0.999)
gen fem_drop=(pct_male>=0.999)

foreach var of varlist *drop {
	replace `var'=0 if length(first)==1
}

sort first
save ${tempdir}gender_miscode.dta, replace

